In [5]:
from os.path import getsize
from glob import glob
from time import time
import netCDF4

def do_raw (chunks):
    with open(fn, 'rb') as d:
        for start, amount in chunks:
            d.seek(start)
            d.read(amount)

def do_netcdf (chunks):
    with netCDF4.Dataset(fn) as d:
        v = d.variables[name]
        for (t0, t1), h in chunks:
            v[t0:t1,:h,...]

name = 'ua'
fn = glob('/opt/data/IPSL-CM5A-MR/rcp85/6hr/atmos/6hrLev/r1i1p1/latest/%s/*.nc' % name)[0]
print fn


/opt/data/IPSL-CM5A-MR/rcp85/6hr/atmos/6hrLev/r1i1p1/latest/ua/ua_6hrLev_IPSL-CM5A-MR_rcp85_r1i1p1_2006010103-2015123121.nc

In [43]:
method = (
    ('raw', 'contiguous', 'few'),
    ('raw', 'contiguous', 'many'),
    ('raw', 'spread', None),
    ('netcdf', 'contiguous', None),
    ('netcdf', 'spread', None)
)[4]

with netCDF4.Dataset(fn) as d:
    shape = d.variables[name].shape
sz = getsize(fn)
n_cells = reduce(int.__mul__, shape)
cell_sz = float(sz) / n_cells
blocks = shape[0] / shape[1]
slcs = shape[1] * blocks
slc_cells = shape[2] * shape[3]
slc_sz = int(round(slc_cells * cell_sz))
read_ts = 1000
read_hs = 25
block_cells = shape[1] * slc_cells
block_sz = int(round(block_cells * cell_sz))

if method[0] == 'raw':
    do = do_raw
    if method[1] == 'contiguous':
        if method[2] == 'few':
            chunks = [(i * block_sz, block_sz) for i in xrange(blocks)]
        else:
            chunks = [(i * block_sz, slc_sz) for i in xrange(slcs)]
    else:
        chunks = [(i * block_sz, slc_sz) for i in xrange(slcs)]
    print '# bytes:', sum(zip(*chunks)[1])
else:
    do = do_netcdf
    if method[1] == 'contiguous':
        chunks = [((read_hs * i, read_hs * (i + 1)), shape[1]) for i in xrange(blocks / read_hs)]
        chunks.append(((chunks[-1][0][1], chunks[-1][0][1] + blocks % read_hs), shape[1]))
    else:
        chunks = [((read_ts * i, read_ts * (i + 1)), 1) for i in xrange(slcs / read_ts)]
        chunks.append(((chunks[-1][0][1], chunks[-1][0][1] + slcs % read_ts), 1))
    print '# numbers:', sum((t1 - t0) * h for (t0, t1), h in chunks) * slc_cells * cell_sz

print '# chunks:', len(chunks)

t0 = time()
do(chunks)
print time() - t0

In [43]:
# raw, contiguous, few
6.0811650753, 5.89195919037, 6.07426786423
# raw, contiguous, many
76.244145155, 77.6571240425, 78.8810811043
# raw, spread (many)
76.6748468876, 76.8060109615, 76.8620369434
# netcdf, contiguous
12.133687973, 12.1888580322, 12.0239961147
# netcdf, spread
113.553792953, 110.792984962, 108.883361101